County Results

theme_c <- function(...){ 
   # font <- "Helvetica"   #assign font family up front
    theme_bw() %+replace%    #replace elements we want to change
    
    theme(
      
      
      #text elements
      plot.title = element_text(             #title
                   size = 16,                #set font size
                   face = 'bold',            #bold typeface
                   hjust = .5,
                   vjust = 3),               
      
      plot.subtitle = element_text(          #subtitle
                   size = 12,
                   hjust = .5,
                   face = 'italic',
                   vjust = 3),               #font size
      
      axis.title = element_text(             #axis titles
                   size = 12),               #font size
      
      axis.text = element_text(              #axis text
                   size = 9),
      legend.text = element_text(size = 12),
      # t, r, b, l
      plot.margin = unit(c(1,.5,.5,.5), "cm"),
      legend.position = "right",
      strip.text.x = element_text(size = 11, face = "bold", color="white"),
      strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
      strip.background = element_rect(fill = "#3E3D3D")
      ) %+replace%
      theme(...)
   
}
targets_dir <- here("_targets")

county <- tar_read(ma_v1, 
                   store =targets_dir) %>% 
  mutate(version = "v1") %>%
  bind_rows(
    tar_read(ma_v2,
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(ma_v3, 
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(ma_v4, 
             store =targets_dir)
  )%>%
   bind_rows(
    tar_read(ma_v5,
             store =targets_dir)
  )%>%
   bind_rows(
    tar_read(ma_v6,
             store =targets_dir)
   )

waste <- tar_read(waste, store =targets_dir)
covidestim <- tar_read(covidestim_biweekly_county,
                        store =targets_dir)

dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))
wjoined <- county %>%
  # set Nantucket, Duke fips to Nantucket since we have wastewater data
  # for Nantucket
  mutate(fips = ifelse(grepl("25019", fips), "25019", fips)) %>%
  inner_join(waste) %>%
  left_join(covidestim, relationship = "many-to-many") %>%
  group_by(biweek) %>%
  mutate(mindate = min(date),
         maxdate = max(date)) %>%
  ungroup()

counties_with_waste <- wjoined %>%
  group_by(fips) %>%
  mutate(obs_w_notna = sum(!is.na(mean_conc))) %>%
  filter(obs_w_notna != 0) %>%
  pull(fips) %>%
  unique()



versions <- tibble(
  version = c("v1", "v2", "v3", "v4", "v5", "v6"),
  vlabel = factor(
    c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$", 
    "$\\beta$ Centered at Emp. Value",
    "$P(S_1|untested)$ Centered at Emp. Value",
    "$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
        "$\\beta$ Centered at Emp. Value (Spline Smoothed)",
         "$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values ($\\beta$ Spline Smoothed)"
    ),
    levels = c(
        "$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$", 
        "$\\beta$ Centered at Emp. Value",
         "$P(S_1|untested)$ Centered at Emp. Value",
        "$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
        "$\\beta$ Centered at Emp. Value (Spline Smoothed)",
         "$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values ($\\beta$ Spline Smoothed)"
       
    ) )
)

wjoined <- wjoined %>%
  left_join(versions)


w_data <- wjoined %>% 
  select(-date) %>%
  distinct() %>%
  filter(fips %in% counties_with_waste) %>%
  filter(!is.na(mean_conc)) %>%
  group_by(fips, version) %>%
  mutate(mean_conc_std = mean_conc/ max(mean_conc),
         exp_cases_median_std = exp_cases_median / max(exp_cases_median),
         exp_cases_lb_std = exp_cases_lb /max(exp_cases_median),
         exp_cases_ub_std = exp_cases_ub / max(exp_cases_median),
         infections_std  = infections/max(infections),
         positive_std = positive/max(positive)) %>%
  ungroup()

Correlations

Normalization Strategy 1: By Values at Biweek Where Wastewater Concentration is Maximized

w_data <- wjoined %>% 
  select(-date) %>%
  distinct() %>%
  filter(fips %in% counties_with_waste) %>%
  filter(!is.na(mean_conc)) %>%
  group_by(fips, version) %>%
  mutate(mean_conc_std = mean_conc/ max(mean_conc),
         max_conc = max(mean_conc),
         exp_cases_median_std = exp_cases_median / exp_cases_median[which.max(mean_conc)],
         exp_cases_lb_std = exp_cases_lb /exp_cases_median[which.max(mean_conc)],
         exp_cases_ub_std = exp_cases_ub / exp_cases_median[which.max(mean_conc)],
         infections_std  = infections/ infections[which.max(mean_conc)],
         positive_std = positive/max(positive)) %>%
  ungroup()

PB Counts and Wastewater

# correlations between PB counts and wastewater concentrations

correlations <- w_data %>%
  group_by(version) %>%
  summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
            vlabel =unique(vlabel)) %>%
  mutate(x=1.8,
         y=.1)

w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Counts\n(Standardized)",
       y = "Mean Wastewater Concentration\n(Standardized)",
       title = "Correlations Between Probabilistic Bias Counts\nand Mean Wastewater Concentration")

plt_wastewater_pb <- w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Counts\n(Standardized)",
       y = "Mean Wastewater Concentration\n(Standardized)",
       title = "")

PB Counts and Covidestim

# correlations between PB counts and Covidestim estimates


covidestim_correlations <- w_data %>%
  filter(!is.na(infections_std)) %>%
  group_by(vlabel) %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(exp_cases_median_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1.8,
         y=.1)



w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
              ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Counts\n(Standardized)",
       y = "Covidestim Estimates\n(Standardized)",
       title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")

plt_covidestim_pb <- w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
              ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Counts\n(Standardized)",
       y = "Covidestim Estimates\n(Standardized)",
       title = "")

Combined Figure

cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)

cor_covid <- covidestim_correlations %>% 
  select(vlabel, 
         `Correlation with Covidestim Estimates`=Correlation)

cor_waste <- correlations %>% 
  select(vlabel, 
         `Correlation with Wastewater Concentrations`=Correlation)


cor_waste %>%
  left_join(cor_covid) %>%
  mutate(vlabel = gsub(
    "$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$", 
    "Priors Do Not Vary by County and Date",
    vlabel, fixed=TRUE),vlabel=gsub(" at","\nat", vlabel)) %>%
  arrange(desc( `Correlation with Wastewater Concentrations`)) %>%
  rename(Implementation = vlabel) %>%
  kbl() %>%
  kable_styling(full_width = TRUE)
Implementation Correlation with Wastewater Concentrations Correlation with Covidestim Estimates
\(\beta\) Centered at Emp. Value (Spline Smoothed) 0.8326628 0.9740849
Priors Do Not Vary by County and Date 0.8292794 0.9812435
\(P(S_1|untested)\) Centered at Emp. Value 0.8258524 0.9786722
\(P(S_1|untested)\) and \(\beta\) Centered at Emp. Values (\(\beta\) Spline Smoothed) 0.8248512 0.9636615
\(\beta\) Centered at Emp. Value 0.7898058 0.9511692
\(P(S_1|untested)\) and \(\beta\) Centered at Emp. Values 0.7755184 0.9348053

Covidestim and Wastewater

covidestim_correlations <- w_data %>%
  filter(version=="v1") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(mean_conc_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1,
         y=.1) 



w_data %>%
  filter(version=="v1") %>%
  ggplot(aes(x=infections_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  theme_c() +
  labs(x = "Covidestim Estimates\n(Standardized)",
       y = "Wastewater Concentrations\n(Standardized)",
       title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")

pb_correlations <- w_data %>%
  filter(version=="v1") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(positive_std,mean_conc_std,
                              use = "complete.obs")) %>%
  mutate(x=1.8,
         y=.1) 

Compare to Wastewater Over Time

library(scales)

pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")

names(pal) <- c(#"$observed$", 
                '$P(S_1|untested)$ Centered at Emp. Value',
                '$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
               "$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
               "$\\beta$ Centered at Emp. Value"
               )


waste_df <- w_data %>%
  select(biweek, mean_conc_std, name) %>%
  distinct() %>%
  left_join(dates,relationship = "many-to-many" ) %>%
  group_by(name, biweek) %>%
  # center date within 2-week interval
  summarize(date = min(date) + days(7),
            mean_conc_std=unique(mean_conc_std)) %>%
  mutate(name = gsub("County, MA", "", name))


w_data%>%
  mutate(name = gsub("County, MA", "", name)) %>%
  left_join(dates,relationship = "many-to-many" ) %>%
  filter(version %in% c("v1","v3")) %>%
  filter(biweek>=6) %>%
  group_by(biweek) %>%
  mutate(mindate = min(date),
         maxdate=max(date)) %>%
  ungroup() %>% 
 #   filter(fips == county_fips  & date >= begin_date & date <= end_date) %>%
    ggplot() +
      geom_ribbon(aes(x = date, 
                 ymin = exp_cases_lb_std,
                 ymax = exp_cases_ub_std,
                 fill = vlabel),
                 alpha = .5) +
      geom_line(data = waste_df,
                aes(x = date, y =mean_conc_std, 
                    color = 'Wastewater Effective Concentrations'),
               # color = "#DB4048",
                size = 1.1) +
      geom_linerange(aes(xmin = mindate, 
                         xmax = maxdate, 
                         y = infections_std,
                         color = 'Covidestim Infection Estimates')) + 
      geom_point(data = waste_df,
                aes(x = date, y =mean_conc_std ),
                color = "#DB4048",
                alpha = .5,
                size = 1.2) +
      facet_grid(name~vlabel, 
                 labeller=labeller(vlabel =as_labeller(
                   TeX, default=label_parsed))) +
      scale_fill_manual(values = pal, labels = c('','','',''), 
                        name = "Probabilistic Bias Intervals") +
      theme_bw() +
     theme_c() +
      labs(y = "Value (Standardized)",
           title = "",
           x= "") +
      scale_x_date(date_labels = "%b %Y") +
    scale_color_manual(
      name = '', 
      values = c(
        'Covidestim Infection Estimates' = 'darkblue',
        'Wastewater Effective Concentrations' = '#DB4048')) +
    guides(color = guide_legend(override.aes = list(size = 12,
                                                    linewidth=3)),
           fill = guide_legend(override.aes =list(size = 6)))

Normalization Strategy 2: By Maximum (Separately)

For each county, normalize the wastewater concentration by the maximum concentration for that county and the probabilistic bias counts by the maximum median estimated counts for that county (2.5th percentile and 97.5th percentile also standardized by the maximum median estimated counts for that county).

PB Counts and Wastewater

# correlations between PB counts and wastewater concentrations

correlations <- w_data %>%
  group_by(version) %>%
  summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
            vlabel =unique(vlabel)) %>%
  mutate(x=1.8,
         y=.1)

w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Counts\n(Standardized)",
       y = "Mean Wastewater Concentration\n(Standardized)",
       title = "Correlations Between Probabilistic Bias Counts\nand Mean Wastewater Concentration")

plt_wastewater_pb <- w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Counts\n(Standardized)",
       y = "Mean Wastewater Concentration\n(Standardized)",
       title = "")

PB Counts and Covidestim

# correlations between PB counts and Covidestim estimates


covidestim_correlations <- w_data %>%
  filter(!is.na(infections_std)) %>%
  group_by(vlabel) %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(exp_cases_median_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1.8,
         y=.1)



w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
             ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Counts\n(Standardized)",
       y = "Covidestim Estimates\n(Standardized)",
       title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")

plt_covidestim_pb <- w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
              ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Counts\n(Standardized)",
       y = "Covidestim Estimates\n(Standardized)",
       title = "")

Combined Figure

cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)

Covidestim and Wastewater

covidestim_correlations <- w_data %>%
  filter(version=="v1") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(mean_conc_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1,
         y=.1) 



w_data %>%
  filter(version=="v1") %>%
  ggplot(aes(x=infections_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  theme_c() +
  labs(x = "Covidestim Estimates\n(Standardized)",
       y = "Wastewater Concentrations\n(Standardized)",
       title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")

pb_correlations <- w_data %>%
  filter(version=="v1") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(positive_std,mean_conc_std,
                              use = "complete.obs")) %>%
  mutate(x=1,
         y=.1)